package com.whit.demo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.HashSet;
import java.util.Set;

public class TestWordCount2 {
	// 本地处理单词计数（大文件）
	// TODO: 过滤掉介词等不计算的单词
	// TODO: 单词保存到文件，增加输入数据文件
	static int count = 0;
	static String[] passedWords = {
			"Passage","a","an","What","Where","Who",
			"and","of","one","We","You","you","we","he","He","She","she",
			"A","A)","C)","C","B)","B","D)","D"};
	static HashSet<String> ignorePassedWords = new HashSet<>();
	static {
		for (String s : passedWords) {
			ignorePassedWords.add(s);
		}
	}
	public static void main(String[] args) throws Exception {
		// 本地读取原始英文文章数据文件
		// 以空格为分隔符,解析出每一行的每个单词，输出单词
		File file = new File("C:\\data\\mywords\\article");
		BufferedReader br = new BufferedReader(new FileReader(file));
		String line = br.readLine();
		while (line != null) {
//				System.out.println(line);
			String[] words = line.split(" ");
			for (String w : words) {
				if(!ignorePassedWords.contains(w.trim())) {
					System.out.println(w);
					count++;
				}
			}
			line = br.readLine();
		}
		System.out.println("共计有：" + count + "个单词");
	}
}
